clear

local y `1'

*local y 96
// For data coming from the archives, need to extract  by census division.
 
if `y' < 86 {
tempfile temp
forval i =1/9 {

	di as text "Extracting file " `i'
	infix state2 1-2 county2 3-5 fill1 6 str siccode2 7-10 fill2 11 str flag 12 tempmm 13-24 tpayq1 25-36 ///
		  tanpay 37-48 testab 49-54 ctyemp1 55-60 ctyemp2 61-66  ctyemp3 67-72 ctyemp4 73-78 ctyemp5 79-84 ///
		  ctyemp6 85-90 ctyemp7 91-96 ctyemp8 97-102 ctyemp9 103-108 ctyemp10 109-114 ctyemp11 115-120 ctyemp12 121-126 ///
		  ctyemp13 127-132 fill3 133 ssastat2 134-135 ssacty2 136-138 fill4 139 fipstate 140-141 fipscty2 142-144 fill5 145 ///
	  using "${raw}/CBP/cbp_`y'_`i'.txt"
	  
	  capture append using "`temp'"
	  save "`temp'", replace
	  clear
	  
	}
	
use "`temp'", clear	



}

if `y' >= 86 {
	
insheet using "${raw}/CBP/cbp`y'co.txt", comma names
cd $temp

// Fixes to make raw files run with Autor-Dorn codoes

gen siccode2 = sic
rename fipscty fipscty2
rename empflag flag
rename emp tempmm
rename est testab
rename n1_4 ctyemp1
rename n5_9 ctyemp2
rename n10_19 ctyemp3
rename n20_49 ctyemp4
rename n50_99 ctyemp5
rename n100_249 ctyemp6
rename n250_499 ctyemp7
rename n500_999 ctyemp8
rename n1000 ctyemp9
rename n1000_1 ctyemp10
rename n1000_2 ctyemp11
rename n1000_3 ctyemp12
rename n1000_4 ctyemp13


* change in siccode for manufacturing subtotal
replace siccode2="19--" if siccode2=="20--"

}



cd $temp

// Autor-Dorn code starts here


*******************************************************************
* Geographic selection
*******************************************************************

* drop Alaska and Hawaii
*drop if fipstate==2 | fipstate==15

* several observations have zeroes for all variables and are dropped
drop if fipstate==0

// To get total 4 digit counts, 
// need to detect and remove 3 and 4 digit double counting
gen code4=sic
destring code4, replace force
gen k3=code4/10
gen f3=floor(k3)
replace code4=. if k3-f3==0
drop k3 f3

gen code3=sic
destring code3, replace force
gen k2=code3/100
gen f2=floor(k2)
replace code3=. if k2-f2==0
drop k2 f2
replace code3=floor(code3/10)

gen countyid=fipstate*1000+fipscty2

gegen four = sum(testab) if code4!=., by(countyid code3)
gegen x    = mean(four), by(countyid code3)

replace testab = testab - x if code4==. & x !=.

// same fix for sizes
foreach var of varlist ctyemp* { 
	
	gegen temp1 = sum(`var') if code4!=., by(countyid code3)
	gegen temp2   = mean(temp1), by(countyid code3)

	replace `var' = `var' - temp2 if code4==. & temp2 !=.
	drop temp*
}

keep if !missing(code3)

destring sic, force replace
drop if sic == .

cap gen state2 = censtate

keep countyid sic testab ctye*  flag state2

if (`y' <= 20) gen year = 2000 + `y'
if (`y' >  20) gen year = 1900 + `y'
sum year
local h =r(mean)
 
save ${wd}/cbp/CPB_`h'_estab.dta, replace 
